* Figure 5 Distribution of individual village outcomes by village-year treatment status
* Sample used to study investments and agricultural outcomes
* Last Updated, Niriksha Shetty 03/31/206

clear
clear mata
clear matrix
set maxvar 20000
set more off

pwd
cap cd c(`pwd')

estimates clear

use "../dta/reduced-form-master.dta", clear

*Using balanced panel
keep if bal_panel3==1

sort id mkt_year

rename fraction_hy HY
rename fraction_cashcrop cashcrop

*Easier labelling for graphs
lab var cost_k_w1_real "Total agricultural costs (INR)"
lab var profit_k_w1_real "Total agricultural profits (INR)"
lab var kval_w1_real "Total agricultural revenues (INR)"
lab var fin_costs_real "Total insurance costs (INR)"
lab var fin_revenues_real "Total insurance revenues (INR)"
lab var totalcosts_w1_real "Total costs (INR)"
lab var totalrevenues_w1_real "Total revenues (INR)"
lab var totalprofit_w1_real "Total profit (INR)"
lab var HY "Fraction of HYV crops"
lab var cashcrop "Fraction of cash-crops"
lab var plotsize_ha "Area cultivated (in hectares)"

* No treated villages in 2005
replace Tvillage = 0  if mkt_year == 2005

// CDFs
foreach var of varlist cost_k_w1_real kval_w1_real profit_k_w1_real fin_costs_real fin_revenues_real totalcosts_w1_real totalrevenues_w1_real totalprofit_w1_real HY cashcrop plotsize_ha {
areg `var', a(mkt_year)
predict e`var'yhat, residuals
qui sum `var'
gen `var'yhat = r(mean)+e`var'yhat
}

replace fin_costs_realyhat = . if Tvillage == 0
replace fin_revenues_realyhat = . if Tvillage == 0

foreach var of varlist cost_k_w1_real kval_w1_real profit_k_w1_real fin_costs_real fin_revenues_real totalcosts_w1_real totalrevenues_w1_real totalprofit_w1_real HY cashcrop plotsize_ha {
cumul `var'yhat if  treat_year == 1, gen(CDF_`var'_T) equal
cumul `var'yhat if treat_year == 0, gen(CDF_`var'_C) equal
lab var CDF_`var'_T "Treatment Group "
lab var CDF_`var'_C "Control Group"
}

* Epps Singleton Test
foreach var of varlist cost_k_w1_real kval_w1_real profit_k_w1_real totalcosts_w1_real totalrevenues_w1_real totalprofit_w1_real HY cashcrop plotsize_ha {
local l`var': variable label `var'
preserve
ksmirnov `var'yhat, by(treat_year)
gen p_cor = r(p_cor) 
keep p_cor
duplicates drop p_cor, force
gen a = string(p_cor, "%7.2f")
local a = a
restore
preserve
escftest `var'yhat, group(treat_year)
gen p_val=r(p_val)
keep p_val
duplicates drop p_val, force
gen p = string(p_val, "%7.2f")
local b = p
restore
twoway (line CDF_`var'_T CDF_`var'_C `var'yhat, sort), title("`l`var''", size(s)) ylabel(,angle (30)) xlabel(,angle (30)) xtitle("") note("KS Test, `a' | Epps-Singleton, `b'") graphregion(color(white)) plotregion(icolor(white))yscale(range(0 1))
graph save cdfs`var', replace 
}

foreach var of varlist fin_costs_real fin_revenues_real {
local l`var': variable label `var'
preserve
ksmirnov `var'yhat, by(treat_year)
gen p_cor = r(p_cor) 
keep p_cor
duplicates drop p_cor, force
gen a = string(p_cor, "%7.2f")
local a = a
restore
preserve
escftest `var'yhat, group(treat_year)
gen p_val=r(p_val)
keep p_val
duplicates drop p_val, force
gen p = string(p_val, "%7.2f")
local b = p
restore
twoway (line CDF_`var'_T CDF_`var'_C `var'yhat, sort), title("`l`var''", size(s)) ylabel(,angle (30)) xlabel(,angle (30)) xline(0, lcolor(maroon)) xtitle("") note("KS Test, `a' | Epps-Singleton, `b'") graphregion(color(white)) plotregion(icolor(white))yscale(range(0 1))
graph save cdfs`var', replace 
}

grc1leg cdfscost_k_w1_real.gph cdfskval_w1_real.gph cdfsprofit_k_w1_real.gph cdfsfin_costs_real.gph cdfsfin_revenues_real.gph cdfstotalcosts_w1_real.gph cdfstotalrevenues_w1_real.gph cdfstotalprofit_w1_real.gph cdfsHY.gph cdfscashcrop.gph cdfsplotsize_ha.gph, graphregion(color(white) lwidth(large)) plotregion(icolor(white)) 
graph export "../output/graphs/f5.emf", replace



